# Loading packages and data
library(ggplot2)
library(ecodata)
library(lubridate)
library(dplyr)
library(stringr)
library(marmap) # bathymetry
library(RColorBrewer)
library(ggnewscale)
library(sf)
library(cowplot)
library(tidyverse)
library(ggpubr)
library(sf)
library(ggdist)
library(ggpubr)
library(wesanderson)
library(raster)
library(mgcv)
#library(glmTMB)
library(ggpmisc)
library(mgcViz)
library(gratia)

# CPUE data (no env covariates)
gt_data_model_cpue <- read.csv(here::here('data/catch_data/gt_data_model_cpue.csv'))
#names(gt_data_model_cpue) <- tolower(names(gt_data_model_cpue))
# Add in column with cpue 
# note: Paul indicated to use small mesh
gt_data_model_cpue <- gt_data_model_cpue %>% 
       rename_all(., .funs = tolower) %>% 
  mutate(mesh_bin = case_when(mesh_size <= 5.6 ~ 'SM',mesh_size >= 5.6 ~ 'LG',
                                                      TRUE ~ 'NA')) %>%
  mutate(cpue_hr = sum_gt_catch/effort_dur)

# Catch data: 
sfobs <-readRDS(here::here('data/catch_data/gold_tile_sf_ob_v1_temp_price.rds'))

sfob.env <- sfobs %>% 
  mutate(mesh_bin = case_when(mesh_size <= 5.6 ~ 'SM', mesh_size >= 5.6 ~ 'LG',
                              TRUE ~ 'NA'),
         cpue_hr = SUM_GT_CATCH/effort_dur) %>% 
  filter(YEAR %in% c(1998:2022) & mesh_bin == 'SM') %>%
  dplyr::select(DATE, YEAR, MONTH, YDAY,trip_id,hull_num, area, effort_dur, 
         SUM_GT_CATCH, cpue_hr, mesh_size, mesh_bin, depth, start_lat, start_lon,
         bottomT, bottomT_avg, MIN_TEMP_C, MEAN_TEMP_C, MAX_TEMP_C,
         TEMP_VARIANCE, TEMP_DEVIATION, MEAN_DPTH_M, tri, sed) %>% 
  mutate(YEAR = as.integer(YEAR)) %>% 
  rename_all(., .funs = tolower)

#write.csv(sfob.env, "C:/Users/stephanie.owen/Documents/tilefish_indicators/data/catch_data/sfob_env.csv", row.names=FALSE)


areas <- sort(unique(sfob.env$area))
catch.tally.ann <- sfob.env %>% # aggregate by year
  group_by(year) %>% 
  summarise(ttl_sum = sum(sum_gt_catch))

# Length data from observer program 
lengths <- read.csv(here::here('data/catch_data/gt_data_length_andy.csv')) 
names(lengths) <- tolower(names(lengths))

# Recruitment estimates from 2021 report
recruit <- read.csv(here::here('data/assessment_data/tilefish_rec_estimate_2021.csv'))

# Merge SF/Obs catch data with recruit estimates:
catch_recruit <- cbind(recruit %>% filter(year %in% c(1998:2020)),
                       catch.tally.ann %>%
                         filter(year %in% c(1998:2020)) %>%
                         dplyr::select(ttl_sum))

# loading in shape files for maps
wd = here::here('shapefiles')
US.areas <- st_read(here::here('shapefiles/USA.shp'), quiet = TRUE)
canada.areas <- st_read(here::here('shapefiles/Canada.shp'), quiet = TRUE)
bts_strata <- st_read(here::here('shapefiles/NES_BOTTOM_TRAWL_STRATA.shp'),
                      quiet = TRUE)
# plot(bts_strata) # to see all bottom trawl strata

gtf_strata <- bts_strata %>% 
  filter(STRATUMA %in% c('01030', '01040', '01070', '01080', '01110', '01120', 
                         '01140', '01150', '01670', '01680', '01710', '01720', 
                         '01750', '01760')) # select just the gtf strata
# plot(gtf_strata)
bathy <- marmap::getNOAA.bathy(-81,-58, 27, 46)
bathy = fortify.bathy(bathy)

Tilefish data


Catch data

Year-class strength is broadly defined as the number of fish spawned or hatched in a given year (Ricker, 1975).

Figure 1. Sum of catch (not accounting for effort), across years. Light blue shaded region represents the temporal range of observer records and red shaded region represents temporal range of study fleet records. The ‘purple’ region is where they overlap. Note that 2000-2005 for observer records had low sample size/number of vessels for tilefish, making the shaded region likely the best region to use for analysis. The vertical dashed lines represent strong year classes for this species (Nesslage et al. 2021). Red asterisk marks year that stock was deemed ‘re-built’.

# tot_catch == total (sum_catch) across hauls. so if tallying up annually, 
# use sum_catch
# Strong year-classes: 1970, 1973, 1993, 1999, 2005, 2013

ggplot(catch.tally.ann, aes(x = factor(year), y = ttl_sum, group = 1))+
   geom_rect(aes(xmin = '2007', xmax = '2022', ymin = -Inf, ymax = Inf), 
            fill = 'red', alpha = 0.02) +
  geom_rect(aes(xmin = '2000', xmax = '2022', ymin = -Inf, ymax = Inf), 
            fill = 'lightblue', alpha = 0.05) +
   geom_vline(xintercept = c('1993','1999', '2005', '2013'), lty = 2) +
  geom_line(color = 'black', size = 1.5) +
  annotate("text", label = "*",
    x = 26, y = 14000, size = 8, colour = "red" )+
  xlab('Year') + 
  ylab('Total sum tilefish catch') + 
  # facet_wrap(~month)+
  theme(axis.text.x = element_text(color = 'black',
                                   size = 12, angle = 45, vjust = 1, hjust=1)) +
  ecodata::theme_facet()

CPUE

Figure 2. Catch-per-unit-effot for undirected trawl trips from the Study fleet and observer program. Zeros have been added using species association methodology (via jaccard index).

see here for example

gt_data_model_cpue %>% 
  filter(mesh_bin == 'SM') %>% # note: Paul indicated to use small mesh
  group_by(year, source) %>% 
  summarise(mean_cpue = mean(cpue_hr),.groups = 'drop') %>%
  ggplot(aes(x=year,y=mean_cpue)) +
  geom_line(lwd = 1) +
  facet_wrap(~source) + 
  theme_bw()

gt_data_model_cpue %>% 
  filter(mesh_bin == 'SM') %>%
  group_by(year) %>% 
  summarise(mean_cpue = mean(cpue_hr),.groups = 'drop') %>%
  ggplot(aes(x=year,y=mean_cpue)) +
  geom_line(lwd = 1) +
  labs(title = 'Study fleet + Observer combined') + 
  theme_bw()

Maps (catch)

Tilefish catch locations (study fleet/observer)

yrs = sort(unique(gt_data_model_cpue$year))    

#for(i in 1:length(yrs)){
yrmap <- function(yrs){
  gt_data_model_cpue %>% 
  filter(start_lat < 42.5 & depth_est > 50 & year == yrs) %>%
  mutate(bin = cut(year, seq(min(year), max(year) + 4, 4), right = FALSE)) %>%
  ggplot() + 
  geom_sf(data = US.areas %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
  geom_contour(data = bathy,
               aes(x=x,y=y,z=-1*z),
               breaks=c(50,100,150,200, Inf),
               size=c(0.3),
               col = 'darkgrey') +
  stat_summary_2d(aes(x=start_lon, y=start_lat, z = cpue_hr),
                  binwidth=c(0.16666,0.16666)) + 
  scale_fill_viridis_c() + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0.2, "cm"),
        legend.key.width = unit(1, "cm")) +
  coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326))  +
  labs(x = '', y = '', fill = 'CPUE') +
  theme_bw() 
}


for(i in 1:length(yrs)){
 cat("\n#####",  as.character(yrs[i]),"\n")
    print(yrmap(yrs[i])) 
    cat("\n")   
}
2000

2001

2002

2003

2004

2005

2006

2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2020

2021

2022

Environmental data


The strong year classes for Golden Tilefish were 1993, 1998, 2005, 2013. Some of the underlying oceanographic processes that may be related to recruitment may influence habitat, retention/displacement and food availablity. These are explored below.

Habitat

Tilefish occupy a very narrow band of habitat conditions. Therefore, temperature and salinity may be of interest.

SST
# SST
sst<-read.csv(here::here('data/sst/sst_ts_gtf_strata.csv'))
Maps (SST)
SST analysis
# SST by month with lag
sst.lag <- sst %>%
  mutate(mean_sst_lag2 = lag(weighted_mean_sst,2),
         mean_sst_lag3 = lag(weighted_mean_sst,3),
         mean_sst_lag6 = lag(weighted_mean_sst,6))

# filter sfob.env - removes 2002 outlier
sfob <- sfob.env %>% filter(depth > 50, cpue_hr >0, cpue_hr < 15) %>%
  group_by(year,month) %>% 
  summarise(mean_cpue = mean(cpue_hr),
            mean_tri = mean(tri),
            mean_sed = mean(sed)) 
         
# Join with sf/ob data
sst.sfob <- dplyr::full_join(sst.lag, sfob, by = join_by(year, month)) %>%
  dplyr::select(year, month, mean_cpue, mean_tri, mean_sed, mean_sst, weighted_mean_sst, mean_sst_lag2, mean_sst_lag3, mean_sst_lag6) %>%
  tidyr::drop_na()



## SST no lag
ggplot2::ggplot(sst.sfob,
                aes(x=mean_cpue, y=weighted_mean_sst)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Mean SST no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## SST summer
ggplot2::ggplot(sst.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=weighted_mean_sst)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Summer Mean SST') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## SST spring
ggplot2::ggplot(sst.sfob %>% filter(month %in% c(4,5,6)),
                aes(x=mean_cpue, y=weighted_mean_sst)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Spring Mean SST') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 2 month lag
ggplot2::ggplot(sst.sfob,
                aes(x=mean_cpue, y=mean_sst_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Mean SST 2 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 3 month lag
ggplot2::ggplot(sst.sfob,
                aes(x=mean_cpue, y=mean_sst_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Mean SST 3 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 6 month lag
ggplot2::ggplot(sst.sfob,
                aes(x=mean_cpue, y=mean_sst_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST') +
  labs(title = 'Mean SST 6 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

SST Correlation and GAM

# Correlation and plot
lm_sst<-lm(weighted_mean_sst ~ mean_cpue, data=sst.sfob)
summary(lm_sst)
## 
## Call:
## lm(formula = weighted_mean_sst ~ mean_cpue, data = sst.sfob)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.238 -4.229 -1.693  4.493 13.612 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.5904     0.7941  20.892  < 2e-16 ***
## mean_cpue    -0.7602     0.1932  -3.934 0.000124 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.211 on 160 degrees of freedom
## Multiple R-squared:  0.0882, Adjusted R-squared:  0.0825 
## F-statistic: 15.48 on 1 and 160 DF,  p-value: 0.0001243
#plot(weighted_mean_sst ~ mean_cpue, data=sst.sfob, pch=1, col="dodgerblue") + abline(lm_sst)

cor.test(sst.sfob$mean_cpue, sst.sfob$weighted_mean_sst, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  sst.sfob$mean_cpue and sst.sfob$weighted_mean_sst
## t = -3.934, df = 160, p-value = 0.0001243
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4314171 -0.1496340
## sample estimates:
##        cor 
## -0.2969773
ggscatter(sst.sfob, x = "weighted_mean_sst", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Weighted Mean SST", ylab = "Monthly Mean CPUE/hr",
          title = "Sea Surface Temperature")

Bottom Temperature

Figure 1. GLORYS vs in-situ bottom temperatures from study fleet vessels.

Figure 2. Bottom temperature (C) across years. Blue dots are in-situ data, red dots are from GLORYS.

ggplot2::ggplot(sfob.env, aes(x=bottomt, y=mean_temp_c)) +
  geom_point(color="blue", alpha=0.1)+
  geom_abline(intercept = 0, slope = 1) +
  xlab('Bottom Temp (SF)') +
  ylab('Bottom Temp (GLORYS)') +
  theme_bw() 

ggplot2::ggplot(sfob.env, aes(x=bottomt, y=year)) +
  geom_point(color="blue", alpha=0.1) +
  geom_point(data = sfob.env, aes(x=mean_temp_c, y=year),
             color="red", alpha=0.1) +
  xlab('Bottom Temp') +
  ylab('Year') +
  labs(color = 'Source') +
  theme_bw() 

Maps (Bottom T)
jet.colors <-colorRampPalette(c("blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))

# select just years with study fleet bottom temps
sf.bt <- sfob.env %>% filter(year>2006 & depth > 50) 
yrs = sort(unique(sf.bt$year))    

#for(i in 1:length(yrs)){
yrmap <- function(yrs){
  sf.bt %>% filter(year == yrs) %>% 
  ggplot() + 
  geom_sf(data = US.areas %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
  geom_contour(data = bathy,
               aes(x=x,y=y,z=-1*z),
               breaks=c(50,100,150,200, Inf),
               size=c(0.3),
               col = 'darkgrey') +
  stat_summary_2d(aes(x=start_lon, y=start_lat, z = bottomt),
                  binwidth=c(0.16666,0.16666)) + 
    scale_fill_gradientn(colors = jet.colors(20)) +
  coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326))  +
  labs(x = '', y = '', fill = 'Bottom temperature (°C)') +
  theme_bw() 
}


for(i in 1:length(yrs)){
 cat("\n######",  as.character(yrs[i]),"\n")
    print(yrmap(yrs[i])) 
    cat("\n")   
}
2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2020

2021

2022

Bottom T analysis

The following figures compare in-situ bottom temperature from the study-fleet data set to the Study fleet and Observer tilefish catch data.

  • Note here temperatures are averaged across all depths > 50 for each month.
#filter by year, depth, cpue (includes min, max, and sd of bottomt by month)
df.lag.cpue = sfob.env %>% filter(year > 2006 & depth > 50 & cpue_hr > 0 & cpue_hr < 15) %>%
  group_by(year,month) %>% 
  summarise(mean_dpth = mean(depth),
            mean_bt = mean(bottomt),
            min_bt = min(bottomt),
            max_bt = max(bottomt),
            sd_bt = sd(bottomt),
            mean_tri = mean(tri),
            mean_sed = mean(sed),
            mean_cpue = mean(cpue_hr)) %>% 
  mutate(mean_bt_lag2 = lag(mean_bt,2),
         mean_bt_lag3 = lag(mean_bt,3), 
         mean_bt_lag6 = lag(mean_bt, 6))

#omit NA bottomt
df.lag.sfob <- df.lag.cpue[!(is.na(df.lag.cpue$mean_bt)), ]

# See what months have data
sort(unique(df.lag.sfob$month))
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12
hist(df.lag.sfob$month)  #winter/spring and summer/fall; summer only season with any non-zero relationship, except fall with 6 mo lag

## Winter/spring bottom temp no lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(1,2,3,4,5,6)),
                aes(x=mean_cpue, y=mean_bt)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)') +
  labs(title = 'Winter/spring no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer/fall bottom temp no lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9,10,11,12)),
                aes(x=mean_cpue, y=mean_bt)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer/fall no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer bottom temp no lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=mean_bt)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Winter/spring bottom temp 2 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(1,2,3,4,5,6)),
                aes(x = mean_cpue, y = mean_bt_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Winter/spring lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer/fall bottom temp 2 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9,10,11,12)),
                aes(x = mean_cpue, y = mean_bt_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer/fall lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer bottom temp 2 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9)),
                aes(x = mean_cpue, y = mean_bt_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Winter/spring bottom temp 3 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(1,2,3,4,5,6)),
                aes(x = mean_cpue, y = mean_bt_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Winter/spring lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer/fall bottom temp 3 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9,10,11,12)),
                aes(x = mean_cpue, y = mean_bt_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer/fall lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer bottom temp 3 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9)),
                aes(x = mean_cpue, y = mean_bt_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer/fall bottom temp 6 month lag
# No 6 month lag data for winter/spring
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9,10,11,12)),
                aes(x = mean_cpue, y = mean_bt_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer/fall lag 6 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer bottom temp 6 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(7,8,9)),
                aes(x = mean_cpue, y = mean_bt_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Summer lag 6 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Fall bottom temp 6 month lag
ggplot2::ggplot(df.lag.sfob %>% filter(month %in% c(10,11,12)),
                aes(x = mean_cpue, y = mean_bt_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean bottom temp (°C)')+
  labs(title = 'Fall lag 6 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

Corrleation and figures for sf/ob data and in-situ bottom temps

lm_bt<-lm(mean_bt ~ mean_cpue, data=df.lag.sfob)
summary(lm_bt)
## 
## Call:
## lm(formula = mean_bt ~ mean_cpue, data = df.lag.sfob)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6026 -1.2897  0.1934  1.1972  3.0474 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.48160    0.30244  37.963   <2e-16 ***
## mean_cpue    0.09561    0.07684   1.244    0.216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.71 on 121 degrees of freedom
## Multiple R-squared:  0.01263,    Adjusted R-squared:  0.004474 
## F-statistic: 1.548 on 1 and 121 DF,  p-value: 0.2158
#plot(mean_bt ~ mean_cpue, data=df.lag.sfob, pch=1, col="dodgerblue") + abline(lm_bt)

cor.test(df.lag.sfob$mean_cpue, df.lag.sfob$mean_bt, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  df.lag.sfob$mean_cpue and df.lag.sfob$mean_bt
## t = 1.2443, df = 121, p-value = 0.2158
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0659437  0.2837900
## sample estimates:
##       cor 
## 0.1124029
ggscatter(df.lag.sfob, x = "mean_bt", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean bottom temp (°C)", ylab = "Monthly Mean CPUE/hr",
          title = "In-situ Bottom Temperature")

Salinity

Here we explore salinity from the GLORYS reanalysis model at three different depths

  • 55 meters (relevant to larvae, recruits)
  • 110 meters (relevant recruits, juveniles)
  • 220 meters (relevant to juveniles, adults)
# Salinity 
Maps (Salinity)
jet.colors <-colorRampPalette(c("blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))

# b <- brick(here::here('data/salinity/dd_sal_55_2000_2009.tif'))
# b.00 <- b[,,,1:365]

# select just years with study fleet bottom temps
sf.bt <- sfob.env %>% filter(year>2006 & depth > 50) 
yrs = sort(unique(sf.bt$year))    

#for(i in 1:length(yrs)){
yrmap <- function(yrs){
  sf.bt %>% filter(year == yrs) %>% 
  ggplot() + 
  geom_sf(data = US.areas %>% st_as_sf(),color = 'gray20', fill = '#cbdbcd') +
  geom_contour(data = bathy,
               aes(x=x,y=y,z=-1*z),
               breaks=c(50,100,150,200, Inf),
               size=c(0.3),
               col = 'darkgrey') +
  # stat_summary_2d(aes(x=start_lon, y=start_lat, z = bottomt),
  #                 binwidth=c(0.16666,0.16666)) + 
  #   scale_fill_gradientn(colors = jet.colors(20)) +
  coord_sf(xlim = c(-75,-65.5), ylim = c(36,44), datum = sf::st_crs(4326))  +
  labs(x = '', y = '', fill = 'Salinty') +
  theme_bw() 
}


for(i in 1:length(yrs)){
 cat("\n######",  as.character(yrs[i]),"\n")
    print(yrmap(yrs[i])) 
    cat("\n")   
}
2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2020

2021

2022

Salinity analysis

Currents

Cross-shelf processes may influence the retention or displacement of tilefish during early life history stages. These are explored below.

Shelf water volume

Shelf water volume: A measure of the volume of water bounded inshore of the shelf-slope front. In this analysis, shelf water is defined as all water having salinity <34 psu. The position of the shelf-slope front varies inter-annually with the higher shelf water values indicating the front being pushed further towards the shelf break.

high shv: front pushed towards sbf low shv: front pushed inshore (more slope water on shelf)

Hypothesis: Higher recruitment success correlated with years of higher shelf water volume in spring/summer. These months months may be particularly important as that is when spawning is occurring and the position of the sbf may influence the position of larvae (away from spawning grounds).

Additional variables in this dataset are shelf water temperature and salinity which may also be indicative of habitat conditions.

# Shelf water volume
shlfvol <- read.csv(here::here('data/shelf_water_volume/ShelfWaterVolume_BSB_update.csv'))

# wrangling date info, converting doy to date and month 
yrs <- as.vector(nrow(shlfvol))
shlfvol$Year <- as.character(shlfvol$Year)
for (i in 1:nrow(shlfvol)){
  yrs[i] <- strsplit(shlfvol$Year, ".", fixed = TRUE)[[i]][1]
}
shlfvol$year <- yrs
shlfvol <- shlfvol %>% mutate(date_= as.Date(Year.Day-1, 
                                             origin=paste0(year, "-01-01")), 
                              month= strftime(date_, "%m"), 
                              day=strftime(date_,"%d"), 
                              year = as.integer(year),
                              month = as.numeric(month))  

Shelf water volume with Study fleet/Observer data

# Create shw vol by month w/lag
df.lag = shlfvol %>%
  filter(year > 1997) %>%
  group_by(year,month) %>%
  summarise(mean_t = mean(ShW.T),
            mean_s = mean(ShW.S),
            mean_v = mean(ShW.Vol)) %>%
  mutate(mean_t_lag2 = lag(mean_t,2),
         mean_t_lag3 = lag(mean_t,3),
         mean_s_lag2 = lag(mean_s,2),
         mean_s_lag3 = lag(mean_s,3),
         mean_v_lag2 = lag(mean_v,2),
         mean_v_lag3 = lag(mean_v,3))

#filter sfob.env 
shlfvol.sfob = sfob.env %>% filter(depth > 50, cpue_hr > 0, cpue_hr<15) %>%
  group_by(year,month) %>% 
  summarise(mean_dpth = mean(depth),
            mean_bt = mean(bottomt),
            mean_cpue = mean(cpue_hr),
            mean_tri = mean(tri),
            mean_sed = mean(sed))

# Join with sf/ob data
df.shlfvol.sfob = dplyr::full_join(shlfvol.sfob, df.lag, by = join_by(year, month)) %>%
  dplyr::select(year, month, mean_cpue, mean_tri, mean_sed, mean_t, mean_s, mean_v,
                mean_t_lag2, mean_v_lag2, mean_s_lag2,
                mean_t_lag3, mean_v_lag3, mean_s_lag3) %>%
  tidyr::drop_na()

#See what months have data
sort(unique(df.shlfvol.sfob$month))
## [1]  7  8  9 10 11
hist(df.shlfvol.sfob$month) #summer and fall

No lags

## Shelf water volume no lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_v)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume summer
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=mean_v)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume summer') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume fall
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(10,11,12)),
                aes(x=mean_cpue, y=mean_v)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume fall') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temp no lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_t)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity no lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_s)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

ggplot2::ggplot(df.shlfvol.sfob %>% filter(year >2007 & month %in% c(7,8,9,10,11,12)),
                aes(x=mean_cpue, y=mean_v)) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  labs(title = 'SFOB x M.S.W Summer/Fall (2007:2021)') +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

ggplot2::ggplot(df.shlfvol.sfob %>% filter(year >2007 & month %in% c(7,8,9,10,11,12)),
                aes(x=mean_cpue, y=mean_s)) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  labs(title = 'SFOB x M.S.W Summer/Fall (2007:2021)') +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

ggplot2::ggplot(df.shlfvol.sfob %>% filter(year >2007 & month %in% c(7,8,9,10,11,12)),
                aes(x=mean_cpue, y=mean_t)) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  labs(title = 'SFOB x M.S.W Summer/Fall (2007:2021)') +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

2 month lags

## Shelf water volume 2 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_v_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume summer - 2 month lag
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=mean_v_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume summer - 2 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume fall - 2 month lag
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(10,11,12)),
                aes(x=mean_cpue, y=mean_v_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume fall - 2 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature 2 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_t_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity 2 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_s_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity - lag 2 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

3 month lags

## Shelf water volume 3 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_v_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume summer - 3 month lag
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=mean_v_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume summer - 3 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

##Shelf water volume fall - 3 month lag
ggplot2::ggplot(df.shlfvol.sfob %>% filter(month %in% c(10,11,12)),
                aes(x=mean_cpue, y=mean_v_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water volume') +
  labs(title = 'Shelf water volume fall - 3 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water temperature 3 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_t_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water temperature') +
  labs(title = 'Shelf water temperature - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Shelf water salinity 3 month lag
ggplot2::ggplot(df.shlfvol.sfob,
                aes(x=mean_cpue, y=mean_s_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Mean shelf water salinity') +
  labs(title = 'Shelf water salinity - lag 3 months') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

Correlations between sf/ob data and shelf water volume/temp/salinity

shlfvol_nolag = dplyr::full_join(shlfvol.sfob, df.lag, by = join_by(year, month)) %>%
  dplyr::select(year, month, mean_cpue, mean_t, mean_s, mean_v, mean_tri, mean_sed) %>%
  tidyr::drop_na()
#shlfvol_nolag <- shlfvol_nolag[-c(11),] #removes outlier

                
lm_shlf<-lm(mean_v ~ mean_cpue, data=shlfvol_nolag)
summary(lm_shlf)
## 
## Call:
## lm(formula = mean_v ~ mean_cpue, data = shlfvol_nolag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1541.4  -392.4  -103.3   550.7  1796.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3401.65     233.03  14.598   <2e-16 ***
## mean_cpue      69.45      59.10   1.175    0.245    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 806.4 on 51 degrees of freedom
## Multiple R-squared:  0.02636,    Adjusted R-squared:  0.007267 
## F-statistic: 1.381 on 1 and 51 DF,  p-value: 0.2454
#plot(mean_v ~ mean_cpue, data=df.shlfvol.sfob, pch=1, col="dodgerblue") + abline(lm_shlf)

cor.test(shlfvol_nolag$mean_cpue, shlfvol_nolag$mean_v, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_nolag$mean_cpue and shlfvol_nolag$mean_v
## t = 1.175, df = 51, p-value = 0.2454
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1128956  0.4144586
## sample estimates:
##       cor 
## 0.1623524
ggscatter(shlfvol_nolag, x = "mean_v", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean shelf water volume", ylab = "Monthly mean CPUE/hr", title="Shelf Water Volume")

# shelf water temperature
lm_shlf_t<-lm(mean_t ~ mean_cpue, data=shlfvol_nolag)
summary(lm_shlf_t)
## 
## Call:
## lm(formula = mean_t ~ mean_cpue, data = shlfvol_nolag)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.358 -4.379 -1.026  4.167  9.039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  14.4736     1.3266  10.911 6.08e-15 ***
## mean_cpue    -1.0080     0.3365  -2.996  0.00422 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.591 on 51 degrees of freedom
## Multiple R-squared:  0.1497, Adjusted R-squared:  0.133 
## F-statistic: 8.976 on 1 and 51 DF,  p-value: 0.004216
#plot(mean_t ~ mean_cpue, data=shlfvol_nolag, pch=1, col="dodgerblue") + abline(lm_shlf_t)

cor.test(shlfvol_nolag$mean_cpue, shlfvol_nolag$mean_t, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_nolag$mean_cpue and shlfvol_nolag$mean_t
## t = -2.996, df = 51, p-value = 0.004216
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5949396 -0.1301732
## sample estimates:
##        cor 
## -0.3868555
ggscatter(shlfvol_nolag, x = "mean_t", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean shelf water temperature", ylab = "Monthly mean CPUE/hr", title="Shelf Water Temperature")

# shelf water salinity
lm_shlf_s<-lm(mean_s ~ mean_cpue, data=shlfvol_nolag)
summary(lm_shlf_s)
## 
## Call:
## lm(formula = mean_s ~ mean_cpue, data = shlfvol_nolag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5784 -0.1686 -0.0186  0.1215  0.5615 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 32.9086526  0.0708229 464.661   <2e-16 ***
## mean_cpue   -0.0000467  0.0179629  -0.003    0.998    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2451 on 51 degrees of freedom
## Multiple R-squared:  1.325e-07,  Adjusted R-squared:  -0.01961 
## F-statistic: 6.759e-06 on 1 and 51 DF,  p-value: 0.9979
#plot(mean_s ~ mean_cpue, data=shlfvol_nolag, pch=1, col="dodgerblue") + abline(lm_shlf_s)

cor.test(shlfvol_nolag$mean_cpue, shlfvol_nolag$mean_s, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  shlfvol_nolag$mean_cpue and shlfvol_nolag$mean_s
## t = -0.0025998, df = 51, p-value = 0.9979
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2706312  0.2699563
## sample estimates:
##           cor 
## -0.0003640386
ggscatter(shlfvol_nolag, x = "mean_s", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean shelf water salinity", ylab = "Monthly mean CPUE/hr", title="Shelf Water Salinity")

Gulf-stream index

Gulf stream index was calculated based on method presented by Pérez-Hernández and Joyce (2014). The gulf stream index (GSI) is a measure of the degrees latitude above the average Gulf Stream position based on ocean temperature at 200m (15 C) depth between 55W to 75W.

Positive values indicate a the mean position of the GS is more Northernly, whereas negative values indicate a more Southernly position.

Test code for GSI/recruit year lags

gsi.m <- read.csv(here::here('data/gulf_stream_index/mm_gsi_1954_2022_chen.csv'))
df <- dplyr::full_join(recruit, gsi.m %>%
                   group_by(year) %>% 
                   filter(month %in% c(3:8)) %>% 
                   summarise(m.gsi = mean(GSI),
                             sd.gsi = sd(GSI),
                             max.gsi = max(GSI), 
                             min.gsi = min(GSI)), 
                 by = join_by(year)) %>% 
  mutate(mean_gsi_lag1 = lag(m.gsi,1),
         mean_gsi_lag2 = lag(m.gsi,2),
         mean_gsi_lag3 = lag(m.gsi,3),
         mean_gsi_lag6 = lag(m.gsi,6),
         mean_gsi_lag7 = lag(m.gsi,7)) %>%
  mutate(pos = ifelse(m.gsi > 0, 'Northerly', 'Southerly'), 
         n.pos = ifelse(m.gsi > 0, 1, 0))
gsi_df <- df[-c(51:69),] 
gsi_df <- gsi_df[-c(13),] # removes recruitment outlier

## No lag
lm_rec_gsi<-lm(m.gsi ~ recruit_est, data=gsi_df)
summary(lm_rec_gsi)
## 
## Call:
## lm(formula = m.gsi ~ recruit_est, data = gsi_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36934 -0.53872 -0.06646  0.55505  1.29036 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.188e-01  2.565e-01   0.853    0.398
## recruit_est 3.419e-08  1.642e-07   0.208    0.836
## 
## Residual standard error: 0.7216 on 47 degrees of freedom
## Multiple R-squared:  0.0009216,  Adjusted R-squared:  -0.02034 
## F-statistic: 0.04336 on 1 and 47 DF,  p-value: 0.836
cor.test(gsi_df$recruit_est, gsi_df$m.gsi, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$m.gsi
## t = 0.20822, df = 47, p-value = 0.836
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2529980  0.3089178
## sample estimates:
##        cor 
## 0.03035838
ggscatter(gsi_df, x = "m.gsi", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment No Lag")

## 1 year lag
lm_rec_gsi1<-lm(mean_gsi_lag1 ~ recruit_est, data=gsi_df)
summary(lm_rec_gsi1)
## 
## Call:
## lm(formula = mean_gsi_lag1 ~ recruit_est, data = gsi_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41118 -0.56366  0.00751  0.53388  1.33505 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  5.520e-01  2.506e-01   2.203   0.0327 *
## recruit_est -2.067e-07  1.598e-07  -1.294   0.2023  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7016 on 46 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0351, Adjusted R-squared:  0.01412 
## F-statistic: 1.673 on 1 and 46 DF,  p-value: 0.2023
cor.test(gsi_df$recruit_est, gsi_df$mean_gsi_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$mean_gsi_lag1
## t = -1.2935, df = 46, p-value = 0.2023
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4476486  0.1022342
## sample estimates:
##        cor 
## -0.1873425
ggscatter(gsi_df, x = "mean_gsi_lag1", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment 1 Year Lag")

## 2 year lag
lm_rec_gsi2<-lm(recruit_est ~ mean_gsi_lag2, data=gsi_df)
summary(lm_rec_gsi2)
## 
## Call:
## lm(formula = recruit_est ~ mean_gsi_lag2, data = gsi_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1027099  -484736  -132370   331238  1458567 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1479881      99031  14.944   <2e-16 ***
## mean_gsi_lag2  -175043     138650  -1.262    0.213    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 642300 on 45 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.03421,    Adjusted R-squared:  0.01275 
## F-statistic: 1.594 on 1 and 45 DF,  p-value: 0.2133
cor.test(gsi_df$recruit_est, gsi_df$mean_gsi_lag2, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$mean_gsi_lag2
## t = -1.2625, df = 45, p-value = 0.2133
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4483090  0.1079472
## sample estimates:
##       cor 
## -0.184953
ggscatter(gsi_df, x = "mean_gsi_lag2", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment 2 Year Lag")

## 3 year lag
lm_rec_gsi3<-lm(mean_gsi_lag3 ~ recruit_est, data=gsi_df)
summary(lm_rec_gsi3)
## 
## Call:
## lm(formula = mean_gsi_lag3 ~ recruit_est, data = gsi_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25846 -0.51294  0.02686  0.46400  1.32597 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  3.524e-01  2.492e-01   1.414    0.164
## recruit_est -1.088e-07  1.577e-07  -0.690    0.494
## 
## Residual standard error: 0.6912 on 44 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.01069,    Adjusted R-squared:  -0.01179 
## F-statistic: 0.4756 on 1 and 44 DF,  p-value: 0.494
cor.test(gsi_df$recruit_est, gsi_df$mean_gsi_lag3, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$mean_gsi_lag3
## t = -0.68965, df = 44, p-value = 0.494
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3822346  0.1926708
## sample estimates:
##        cor 
## -0.1034113
ggscatter(gsi_df, x = "mean_gsi_lag3", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment 3 Year Lag")

## 6 year lag
lm_rec_gsi6<-lm(mean_gsi_lag6 ~ recruit_est, data=gsi_df)
summary(lm_rec_gsi6)
## 
## Call:
## lm(formula = mean_gsi_lag6 ~ recruit_est, data = gsi_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23331 -0.49821  0.00945  0.35327  1.30887 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -2.582e-01  2.319e-01  -1.113   0.2721  
## recruit_est  2.640e-07  1.474e-07   1.791   0.0807 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6391 on 41 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.07255,    Adjusted R-squared:  0.04993 
## F-statistic: 3.207 on 1 and 41 DF,  p-value: 0.0807
cor.test(gsi_df$recruit_est, gsi_df$mean_gsi_lag6, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$mean_gsi_lag6
## t = 1.7909, df = 41, p-value = 0.0807
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03372076  0.52705708
## sample estimates:
##      cor 
## 0.269351
ggscatter(gsi_df, x = "mean_gsi_lag6", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment 6 Year Lag")

## 7 year lag
lm_rec_gsi7<-lm(mean_gsi_lag7 ~ recruit_est, data=gsi_df)
summary(lm_rec_gsi7)
## 
## Call:
## lm(formula = mean_gsi_lag7 ~ recruit_est, data = gsi_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1718 -0.4398 -0.0091  0.3867  1.3412 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.069e-02  2.379e-01  -0.255    0.800
## recruit_est  1.026e-07  1.499e-07   0.684    0.498
## 
## Residual standard error: 0.6424 on 40 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.01157,    Adjusted R-squared:  -0.01314 
## F-statistic: 0.4684 on 1 and 40 DF,  p-value: 0.4977
cor.test(gsi_df$recruit_est, gsi_df$mean_gsi_lag7, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_df$recruit_est and gsi_df$mean_gsi_lag7
## t = 0.68438, df = 40, p-value = 0.4977
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2029868  0.3984837
## sample estimates:
##       cor 
## 0.1075818
ggscatter(gsi_df, x = "mean_gsi_lag7", y = "recruit_est",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Recruitment Estimate",
          title = "Recruitment 7 Year Lag")

CPUE/Year lags

sfob_yr<- dplyr::full_join(catch_recruit, gsi.m %>%
                   group_by(year) %>% 
                   summarise(m.gsi = mean(GSI),
                             sd.gsi = sd(GSI),
                             max.gsi = max(GSI), 
                             min.gsi = min(GSI)), 
                 by = join_by(year)) %>% 
  mutate(mean_gsi_lag1 = lag(m.gsi,1),
         mean_gsi_lag2 = lag(m.gsi,2),
         mean_gsi_lag3 = lag(m.gsi,3)) %>%
  mutate(pos = ifelse(m.gsi > 0, 'Northerly', 'Southerly'), 
         n.pos = ifelse(m.gsi > 0, 1, 0))
gsi_yr <- sfob_yr[-c(20:21,24:69),] #removes years not in study fleet and 2 outliers

## No lag
lm_cpue_gsi<-lm(m.gsi ~ ttl_sum, data=gsi_yr)
summary(lm_cpue_gsi)
## 
## Call:
## lm(formula = m.gsi ~ ttl_sum, data = gsi_yr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1295 -0.5937  0.1162  0.4767  0.9813 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2746646  0.2449066   1.122    0.276
## ttl_sum     0.0001985  0.0001557   1.275    0.218
## 
## Residual standard error: 0.6701 on 19 degrees of freedom
## Multiple R-squared:  0.07878,    Adjusted R-squared:  0.0303 
## F-statistic: 1.625 on 1 and 19 DF,  p-value: 0.2178
cor.test(gsi_yr$ttl_sum, gsi_yr$m.gsi, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_yr$ttl_sum and gsi_yr$m.gsi
## t = 1.2747, df = 19, p-value = 0.2178
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1718200  0.6353842
## sample estimates:
##       cor 
## 0.2806859
ggscatter(gsi_yr, x = "m.gsi", y = "ttl_sum",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Total CPUE/year",
          title = "SFOB CPUE No Lag")

## 1 year lag
lm_cpue_gsi1<-lm(mean_gsi_lag1 ~ ttl_sum, data=gsi_yr)
summary(lm_cpue_gsi1)
## 
## Call:
## lm(formula = mean_gsi_lag1 ~ ttl_sum, data = gsi_yr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39351 -0.43190 -0.09479  0.51141  0.92795 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.1119394  0.2355120   0.475   0.6403  
## ttl_sum     0.0002818  0.0001465   1.924   0.0703 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6198 on 18 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1706, Adjusted R-squared:  0.1245 
## F-statistic: 3.702 on 1 and 18 DF,  p-value: 0.07032
cor.test(gsi_yr$ttl_sum, gsi_yr$mean_gsi_lag1, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_yr$ttl_sum and gsi_yr$mean_gsi_lag1
## t = 1.924, df = 18, p-value = 0.07032
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03612372  0.72332456
## sample estimates:
##       cor 
## 0.4129991
ggscatter(gsi_yr, x = "mean_gsi_lag1", y = "ttl_sum",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Total CPUE/year",
          title = "SFOB CPUE 1 Year Lag")

## 2 year lag
lm_cpue_gsi2<-lm(mean_gsi_lag2 ~ ttl_sum, data=gsi_yr)
summary(lm_cpue_gsi2)
## 
## Call:
## lm(formula = mean_gsi_lag2 ~ ttl_sum, data = gsi_yr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.14663 -0.43408  0.05566  0.37488  0.91261 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.026435   0.230986   0.114   0.9102  
## ttl_sum     0.000276   0.000140   1.971   0.0652 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5627 on 17 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.186,  Adjusted R-squared:  0.1381 
## F-statistic: 3.885 on 1 and 17 DF,  p-value: 0.06523
cor.test(gsi_yr$ttl_sum, gsi_yr$mean_gsi_lag2, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_yr$ttl_sum and gsi_yr$mean_gsi_lag2
## t = 1.9709, df = 17, p-value = 0.06523
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02851206  0.74044431
## sample estimates:
##       cor 
## 0.4312825
ggscatter(gsi_yr, x = "mean_gsi_lag2", y = "ttl_sum",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Total CPUE/year",
          title = "SFOB CPUE 2 Year Lag")

## 3 year lag
lm_cpue_gsi3<-lm(mean_gsi_lag3 ~ ttl_sum, data=gsi_yr)
summary(lm_cpue_gsi3)
## 
## Call:
## lm(formula = mean_gsi_lag3 ~ ttl_sum, data = gsi_yr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.02617 -0.43508 -0.03968  0.38423  1.17224 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  5.069e-01  2.822e-01   1.797   0.0913 .
## ttl_sum     -9.042e-05  1.665e-04  -0.543   0.5946  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6324 on 16 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.0181, Adjusted R-squared:  -0.04327 
## F-statistic: 0.2949 on 1 and 16 DF,  p-value: 0.5946
cor.test(gsi_yr$ttl_sum, gsi_yr$mean_gsi_lag3, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi_yr$ttl_sum and gsi_yr$mean_gsi_lag3
## t = -0.54305, df = 16, p-value = 0.5946
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5658579  0.3546148
## sample estimates:
##        cor 
## -0.1345276
ggscatter(gsi_yr, x = "mean_gsi_lag3", y = "ttl_sum",
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Yearly GSI", ylab = "Total CPUE/year",
          title = "SFOB CPUE 3 Year Lag")

#gsi.m <- read.csv(here::here('C:/Users/stephanie.owen/Documents/tilefish_indicators/gulf_stream_index/mm_gsi_1954_2022_chen.csv'))

#subset only GSI, month, year and 1997>
#gsi<-gsi.m[c("GSI","year","month")] %>%
 # filter(year > 1997)
#write.csv(gsi, "C:/Users/stephanie.owen/Documents/tilefish_indicators/gulf_stream_index/gsi.csv", row.names=FALSE)

#filter sfob.env 
#gsi.sfob <- sfob.env %>% filter(depth > 50) %>%
 # group_by(year,month) %>% 
  #summarise(mean_cpue = mean(cpue_hr))
#write.csv(gsi.sfob, "C:/Users/stephanie.owen/Documents/tilefish_indicators/gulf_stream_index/gsi_sfob.csv", row.names=FALSE)

#new df wouldn't recognize month when joining the gsi and sfob data so I did it manually 
gsi <- read.csv(here::here('C:/Users/stephanie.owen/Documents/tilefish_indicators/data/gulf_stream_index/gsi_sfob.csv'))

gsi_nozero <- filter(gsi, mean_cpue > 0, mean_cpue<15) #removes outlier

gsi.sfob<-mutate(gsi_nozero, pos = ifelse(GSI > 0, 'Northerly', 'Southerly'), 
         n.pos = ifelse(GSI > 0, 1, 0))

GSI by Month and Season

# GSI all months (1998-2022)
ggplot(gsi.sfob ,aes(x=mean_cpue, y=GSI)) + 
  geom_point(color = 'black') +
  facet_wrap(~month)+
  xlab('Mean CPUE/hr') +
  ylab('Gulf stream position anomaly') +
  labs(title = 'Gulf stream index by month') +
  geom_hline(yintercept = 0, lty = 2) +
  theme_bw()

#GSI Jan-Apr
filter(gsi.sfob, month %in% c(1:4)) %>%
ggplot(.,aes(x=mean_cpue, y=GSI)) + 
 geom_point(color = 'black') +
  xlab('Mean CPUE/hr') +
  ylab('Gulf stream position anomaly') +
  labs(title = 'Gulf stream index (Jan:Mar)') +
  geom_hline(yintercept = 0, lty = 2) +
  theme_bw()

#GSI Sept-Dec
filter(gsi.sfob, month %in% c(9:12)) %>%
ggplot(.,aes(x=mean_cpue, y=GSI)) + 
  geom_point(color = 'black') +
  xlab('Mean CPUE/hr') +
  ylab('Gulf stream position anomaly') +
  labs(title = 'Gulf stream index (Apr:Jul)') +
  geom_hline(yintercept = 0, lty = 2) +
  theme_bw()

# All months
ggplot(gsi.sfob, aes(x=mean_cpue, y=GSI)) + 
  geom_point(color = 'black') +
  geom_hline(yintercept = 0, lty = 2)+
  labs(title = 'All months') +
  xlab('Mean CPUE/hr') +
  ylab('Gulf stream position anomaly') +
  geom_smooth(method = "lm", formula = y ~ x, size = 1, se = FALSE,
              aes(colour = 'Linear')) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2),
              size = 1, se = FALSE, aes(colour = 'Quadratic')) + 
  geom_smooth(method = "loess", formula = y ~ x, 
              size = 1, se = FALSE, aes(colour = 'Loess')) + 
  geom_smooth(method = "gam", formula = y ~  s(x), 
              size = 1, se = FALSE, aes(colour = 'Gam')) + 
  geom_smooth(method = "gam", formula = y ~ s(x, k = 3),
              size = 1, se = FALSE, aes(colour = 'Gam2')) +
  scale_color_manual(name='Model',
                     breaks=c('Linear', 'Quadratic', 'Loess', 'Gam', 'Gam2'),
                     values=c('Linear'='black', 'Quadratic'='blue', 
                              'Loess'='red', 'Gam' = 'green',
                              'Gam2' = 'purple')) +
  theme_bw()

#ANOVA
pos_aov_cpue <- aov(mean_cpue ~ month * pos,
              data = gsi.sfob)

# look at effects and interactions
summary(pos_aov_cpue)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## month         1   33.9   33.95   4.189 0.0425 *
## pos           1   35.3   35.34   4.361 0.0386 *
## month:pos     1    4.6    4.58   0.566 0.4533  
## Residuals   142 1150.8    8.10                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tidy_pos_aov_cpue <- broom::tidy(pos_aov_cpue)
tidy_pos_aov_cpue
## # A tibble: 4 × 6
##   term         df   sumsq meansq statistic p.value
##   <chr>     <dbl>   <dbl>  <dbl>     <dbl>   <dbl>
## 1 month         1   33.9   33.9      4.19   0.0425
## 2 pos           1   35.3   35.3      4.36   0.0386
## 3 month:pos     1    4.58   4.58     0.566  0.453 
## 4 Residuals   142 1151.     8.10    NA     NA
ggplot(data = gsi.sfob, 
             aes(x = mean_cpue, y = GSI, fill = pos,  
                 group = year)) +
   geom_bar(color = "black", stat = "identity", 
            position = position_dodge2(preserve = "single"), width = 20) +
  theme_bw() +
  labs(title = '1998-2022',
       x = "Mean CPUE/hr", 
       y = "Gulf stream position anomaly")

Correlations and GAM

#correlations gsi and cpue
lm_gsi<-lm(GSI ~ mean_cpue, data=gsi.sfob)
summary(lm_gsi)
## 
## Call:
## lm(formula = GSI ~ mean_cpue, data = gsi.sfob)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1585 -0.4956  0.1054  0.5892  1.3352 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.84606    0.07917  10.687  < 2e-16 ***
## mean_cpue   -0.06567    0.02099  -3.129  0.00212 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7345 on 144 degrees of freedom
## Multiple R-squared:  0.06366,    Adjusted R-squared:  0.05716 
## F-statistic:  9.79 on 1 and 144 DF,  p-value: 0.002124
#plot(GSI ~ mean_cpue, data=gsi.sfob, pch=1, col="dodgerblue") + abline(lm_gsi)

cor.test(gsi.sfob$mean_cpue, gsi.sfob$GSI, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  gsi.sfob$mean_cpue and gsi.sfob$GSI
## t = -3.1289, df = 144, p-value = 0.002124
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.39842270 -0.09369611
## sample estimates:
##        cor 
## -0.2523042
ggscatter(gsi.sfob, x = "GSI", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean Monthly GSI", ylab = "Monthly mean CPUE/hr",
          title = "Gulf stream index")

Food availablity

Larval tilefish eat zooplankton, likely calanus. Calanus finmarchicus are a copepod (crustacean) with a one-year life cycle and are an important food source for many commercially important species. Calanus spp. are lipid rich, herbivorous species that eat phytoplankton, diatoms in particular (Hobbs et al. 2020).

Diatoms are often represented as microplankton (>20 µm), but many species are of the nanoplankton size class (2-20 µm), and a smaller few may even overlap with picoplanton size class (<2 µm).

Microplankton
# microplankton
micro<-read.csv(here::here('data/phyto_size_class/microplankton_ts_gtf_strata.csv'))
Microplankton maps
Microplankton analysis
# microplankton by month with lag
micro.lag <- micro %>%
  mutate(mean_micro_lag2 = lag(weighted_mean_micro,2),
         mean_micro_lag3 = lag(weighted_mean_micro,3),
         mean_micro_lag6 = lag(weighted_mean_micro,6))

# filter sfob.env 
sfob <- sfob.env %>% filter(depth > 50, cpue_hr > 0, cpue_hr<15) %>%
  group_by(year,month) %>% 
  summarise(mean_cpue = mean(cpue_hr)) 
         
# Join with sf/ob data
micro.sfob <- dplyr::full_join(micro.lag, sfob, by = join_by(year, month)) %>%
  dplyr::select(year, month, mean_cpue, mean_micro, weighted_mean_micro, mean_micro_lag2, mean_micro_lag3, mean_micro_lag6) %>%
  tidyr::drop_na()


## Microplankton no lag
ggplot2::ggplot(micro.sfob,
                aes(x=mean_cpue, y=weighted_mean_micro)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Mean Microplankton no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Microplankton summer
ggplot2::ggplot(micro.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=weighted_mean_micro)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Summer Mean Microplankton') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Microplankton spring
ggplot2::ggplot(micro.sfob %>% filter(month %in% c(4,5,6)),
                aes(x=mean_cpue, y=weighted_mean_micro)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Spring Mean Microplankton') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 2 month lag
ggplot2::ggplot(micro.sfob,
                aes(x=mean_cpue, y=mean_micro_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Mean Microplankton 2 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 3 month lag
ggplot2::ggplot(micro.sfob,
                aes(x=mean_cpue, y=mean_micro_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Mean Microplankton 3 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 6 month lag
ggplot2::ggplot(micro.sfob,
                aes(x=mean_cpue, y=mean_micro_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean Microplankton') +
  labs(title = 'Mean Microplankton 6 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

Micro correlation and GAM

# Correlation and plot
lm_micro<-lm(weighted_mean_micro ~ mean_cpue, data=micro.sfob)
summary(lm_micro)
## 
## Call:
## lm(formula = weighted_mean_micro ~ mean_cpue, data = micro.sfob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36771 -0.12189 -0.01156  0.07468  0.78481 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.225251   0.025270   8.914 9.85e-16 ***
## mean_cpue   0.026512   0.006153   4.309 2.84e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.166 on 162 degrees of freedom
## Multiple R-squared:  0.1028, Adjusted R-squared:  0.09727 
## F-statistic: 18.56 on 1 and 162 DF,  p-value: 2.842e-05
#plot(weighted_mean_micro ~ mean_cpue, data=micro.sfob, pch=1, col="dodgerblue") + abline(lm_micro)

cor.test(micro.sfob$mean_cpue, micro.sfob$weighted_mean_micro, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  micro.sfob$mean_cpue and micro.sfob$weighted_mean_micro
## t = 4.3085, df = 162, p-value = 2.842e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1760358 0.4516907
## sample estimates:
##       cor 
## 0.3206357
ggscatter(micro.sfob, x = "weighted_mean_micro", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Weighted mean microplankton", ylab = "Monthly mean CPUE/hr",
          title = "Microplankton")

Chlorophyll-A
# CHL-A
chl<-read.csv(here::here('data/chl/chl_ts_gtf_strata.csv'))
Maps (Chlorophyll-A)
Chlorophyll-A analysis
# chl by month with lag
chl.lag <- chl %>%
  mutate(mean_chl_lag2 = lag(weighted_mean_chl,2),
         mean_chl_lag3 = lag(weighted_mean_chl,3),
         mean_chl_lag6 = lag(weighted_mean_chl,6))

# filter sfob.env 
sfob <- sfob.env %>% filter(depth > 50, cpue_hr > 0, cpue_hr<15) %>%
  group_by(year,month) %>% 
  summarise(mean_cpue = mean(cpue_hr)) 
         
# Join with sf/ob data
chl.sfob <- dplyr::full_join(chl.lag, sfob, by = join_by(year, month)) %>%
  dplyr::select(year, month, mean_cpue, mean_chl, weighted_mean_chl, mean_chl_lag2, mean_chl_lag3, mean_chl_lag6) %>%
  tidyr::drop_na()


## CHL-a no lag
ggplot2::ggplot(chl.sfob,
                aes(x=mean_cpue, y=weighted_mean_chl)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Mean CHL-a no lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## CHL summer
ggplot2::ggplot(chl.sfob %>% filter(month %in% c(7,8,9)),
                aes(x=mean_cpue, y=weighted_mean_chl)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Summer Mean CHL-a') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## CHL fall
ggplot2::ggplot(chl.sfob %>% filter(month %in% c(10,11,12)),
                aes(x=mean_cpue, y=weighted_mean_chl)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Fall Mean CHL-a') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 2 month lag
ggplot2::ggplot(chl.sfob,
                aes(x=mean_cpue, y=mean_chl_lag2)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Mean CHL-a 2 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 3 month lag
ggplot2::ggplot(chl.sfob,
                aes(x=mean_cpue, y=mean_chl_lag3)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Mean CHL-a 3 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## 6 month lag
ggplot2::ggplot(chl.sfob,
                aes(x=mean_cpue, y=mean_chl_lag6)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean CHL-a') +
  labs(title = 'Mean CHL-a 6 month lag') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

CHL-a and CPUE correlation

lm_chl<-lm(weighted_mean_chl ~ mean_cpue, data=chl.sfob)
summary(lm_chl)
## 
## Call:
## lm(formula = weighted_mean_chl ~ mean_cpue, data = chl.sfob)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49433 -0.14471 -0.02389  0.11336  1.16751 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.560105   0.035261  15.885  < 2e-16 ***
## mean_cpue   0.033625   0.008586   3.916 0.000132 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2316 on 162 degrees of freedom
## Multiple R-squared:  0.08648,    Adjusted R-squared:  0.08084 
## F-statistic: 15.34 on 1 and 162 DF,  p-value: 0.0001323
#plot(weighted_mean_chl ~ mean_cpue, data=chl.sfob, pch=1, col="dodgerblue") + abline(lm_chl)

cor.test(chl.sfob$mean_cpue, chl.sfob$weighted_mean_chl, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  chl.sfob$mean_cpue and chl.sfob$weighted_mean_chl
## t = 3.9162, df = 162, p-value = 0.0001323
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1474754 0.4280382
## sample estimates:
##       cor 
## 0.2940789
ggscatter(chl.sfob, x = "weighted_mean_chl", y = "mean_cpue", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Weighted mean CHL-a", ylab = "Monthly mean CPUE/hr",
          title = "CHL-a")

SST Fronts
# SST fronts
fprob<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_3x3.csv')) #fprob across all individual strata
fprob.ind<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_indv_substrata_3x3.csv')) #mean for each of 14 substrata
fprob.ns<-read.csv(here::here('data/sst_fronts/fprob_seasonal_ts_gtf_n_v_s_substrata_3x3.csv')) #mean for substrata N and S of Hudson canyon
Maps (SST fronts)
SST Fronts analysis

FPROB across all individual strata

#sfob by season
sfob_season<-read.csv(here::here('data/sst_fronts/sfob_byseason.csv'))

sfob_season <- sfob_season %>% filter (mean_cpue<30) #removes outlier

         
#join with fprob
fprob.sfob <- dplyr::full_join(fprob, sfob_season, by = join_by(year, season)) %>%
  dplyr::select(year, season, mean_cpue, mean_fprob, weighted_mean_fprob) %>%
  tidyr::drop_na()

## All months
ggplot2::ggplot(fprob.sfob,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Mean SST frontal probability') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Winter
ggplot2::ggplot(fprob.sfob %>% filter(season == "winter") ,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Winter Mean SST frontal probability') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Spring
ggplot2::ggplot(fprob.sfob %>% filter(season == "spring"),
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Spring Mean SST frontal probability') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Summer
ggplot2::ggplot(fprob.sfob %>% filter(season == "summer"),
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Summer Mean SST frontal probability') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Fall
ggplot2::ggplot(fprob.sfob %>% filter(season == "fall"),
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Fall Mean SST frontal probability') +
  stat_cor(aes(label=..rr.label..)) +
  theme_bw()

## Correlation and GAM
lm_fprob<-lm(mean_cpue ~ weighted_mean_fprob, data=fprob.sfob)
summary(lm_fprob)
## 
## Call:
## lm(formula = mean_cpue ~ weighted_mean_fprob, data = fprob.sfob)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2324 -3.2464 -0.9886  1.8721 13.5380 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)           11.195      8.233   1.360    0.178
## weighted_mean_fprob   -6.180     10.534  -0.587    0.559
## 
## Residual standard error: 4.65 on 75 degrees of freedom
## Multiple R-squared:  0.004568,   Adjusted R-squared:  -0.008704 
## F-statistic: 0.3442 on 1 and 75 DF,  p-value: 0.5592
#plot(weighted_mean_fprob ~ mean_cpue, data=fprob.sfob, pch=1, col="dodgerblue") + abline(lm_fprob)

cor.test(fprob.sfob$mean_cpue, fprob.sfob$weighted_mean_fprob, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  fprob.sfob$mean_cpue and fprob.sfob$weighted_mean_fprob
## t = -0.58666, df = 75, p-value = 0.5592
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2872176  0.1587960
## sample estimates:
##         cor 
## -0.06758665
ggscatter(fprob.sfob, x = "mean_cpue", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.coeff.args = list(method="pearson"),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean CPUE/hr", ylab = "Mean Seasonal SST Frontal probability")

#fprob_gam <- gam(mean_cpue ~ s(weighted_mean_fprob), data=fprob.sfob, method="REML")
#gam.check(fprob_gam)
#summary(fprob_gam)

#draw(fprob_gam, residuals=TRUE)

FPROB of each substrata

#join with sfob
ind.sfob <- dplyr::full_join(fprob.ind, sfob_season, by = join_by(year, season)) %>%
  dplyr::select(year, season, id, mean_cpue, mean_fprob, weighted_mean_fprob) %>%
  tidyr::drop_na()

# fprob by substrata
ggplot2::ggplot(ind.sfob,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Mean SST frontal probability') +
  stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
               label.x.npc = 30, label.y.npc = 0.15,
               parse = TRUE, size = 3)  +
  theme_bw() +
  facet_wrap(~id) +
  theme_facet()

## Winter
ggplot2::ggplot(ind.sfob %>% filter(season == "winter") ,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Winter Mean SST frontal probability') +
  stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
               label.x.npc = 30, label.y.npc = 0.15,
               parse = TRUE, size = 3)  +
  theme_bw() +
  facet_wrap(~id) +
  theme_facet()

## Spring
ggplot2::ggplot(ind.sfob %>% filter(season == "spring") ,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Spring Mean SST frontal probability') +
  stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
               label.x.npc = 30, label.y.npc = 0.15,
               parse = TRUE, size = 3)  +
  theme_bw() +
  facet_wrap(~id) +
  theme_facet()

## Summer
ggplot2::ggplot(ind.sfob %>% filter(season == "summer") ,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Summer Mean SST frontal probability') +
  stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
               label.x.npc = 30, label.y.npc = 0.15,
               parse = TRUE, size = 3)  +
  theme_bw() +
  facet_wrap(~id) +
  theme_facet()

## Fall
ggplot2::ggplot(ind.sfob %>% filter(season == "fall") ,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Fall Mean SST frontal probability') +
  stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
               label.x.npc = 30, label.y.npc = 0.15,
               parse = TRUE, size = 3)  +
  theme_bw() +
  facet_wrap(~id) +
  theme_facet()

## Scatter plot with correlations
#plot cuts off p values unless you open it in a new window and save 
ggscatter(ind.sfob, x = "mean_cpue", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean CPUE/hr", ylab = "Mean SST frontal probability") +
  facet_wrap(~id) +
  theme_facet()

N/S substrata

fprob_ns<-read.csv(here::here('data/sst_fronts/fprob_ns.csv'))
fprob.ns<-fprob_ns[!(is.na(fprob_ns$mean_cpue)), ]

# fprob by N/S Hudson Canyon
ggplot2::ggplot(fprob.ns,
                aes(x=mean_cpue, y=weighted_mean_fprob)) + 
  geom_point(color = 'black') +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth") +
  xlab('Mean CPUE/hr') +
  ylab('Weighted mean SST frontal probability') +
  labs(title = 'Mean SST frontal probability') +
   stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")), 
              label.x.npc = 10,
               parse = TRUE, size = 5)  +
  theme_bw() +
  facet_wrap(~substrat) +
  theme_facet()

## Scatter plot with correlations
ggscatter(fprob.ns, x = "mean_cpue", y = "weighted_mean_fprob", 
          add = "reg.line", conf.int = TRUE,
          cor.coef = TRUE, cor.coeff.args = list(method="pearson", 
              label.x=20, label.y=0.6, size =3),
          add.params=list(color="dodgerblue", fill="lightgray"),
          xlab = "Mean CPUE/hr", ylab = "Mean SST frontal probability") +
  facet_wrap(~substrat) +
  theme_facet()

Multivariable GAMs
## habitat (sst/bt/sal?)
habitat <- dplyr::full_join(
  sst.sfob, df.lag.sfob, by = join_by(year, month, mean_cpue, mean_tri, mean_sed)) %>%
  dplyr::select(year, month, mean_cpue, weighted_mean_sst, mean_bt, mean_tri, mean_sed)
habitat <- habitat[-c(1:22),] #removes year up to 2007 (start of insitu bt data)

habitat.gam <- gam(mean_cpue ~ s(weighted_mean_sst, k=10) + s(mean_bt, k=10) + mean_tri + mean_sed, data=habitat, method="REML")
summary(habitat.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_cpue ~ s(weighted_mean_sst, k = 10) + s(mean_bt, k = 10) + 
##     mean_tri + mean_sed
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.47640    1.15512   3.875 0.000177 ***
## mean_tri     0.20742    0.35491   0.584 0.560078    
## mean_sed    -0.03175    0.02769  -1.147 0.253933    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                        edf Ref.df     F p-value  
## s(weighted_mean_sst) 2.847  3.571 3.348  0.0141 *
## s(mean_bt)           1.001  1.001 1.964  0.1638  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.122   Deviance explained = 16.4%
## -REML = 242.09  Scale est. = 2.8472    n = 122
plot(habitat.gam, seWithMean = TRUE)

## currents (shelf vol./GSI)
df.gsi <- read.csv(here::here('data/gulf_stream_index/df.gsi.csv')) #with tri and sed
df.gsi <- tidyr::drop_na(df.gsi)


currents <- dplyr::full_join(
  shlfvol_nolag, df.gsi, by = join_by(year, month, mean_cpue, mean_tri, mean_sed)) %>%
  dplyr::select(year, month, mean_cpue, mean_v, GSI, mean_tri, mean_sed)

currents1 <- read.csv(here::here('data/gulf_stream_index/currents.csv')) #cleaned up

currents.gam <- gam(mean_cpue ~ s(mean_v, k=10) + s(GSI, k=10) + mean_tri + mean_sed, data=currents1, method="REML")
summary(currents.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## mean_cpue ~ s(mean_v, k = 10) + s(GSI, k = 10) + mean_tri + mean_sed
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.87795    2.22946   1.291    0.204
## mean_tri    -0.02072    0.66739  -0.031    0.975
## mean_sed     0.02622    0.05891   0.445    0.658
## 
## Approximate significance of smooth terms:
##             edf Ref.df     F p-value
## s(mean_v) 1.000   1.00 1.737   0.194
## s(GSI)    1.505   1.87 0.401   0.589
## 
## R-sq.(adj) =  0.0469   Deviance explained = 13.6%
## -REML = 120.74  Scale est. = 8.3484    n = 49
plot(currents.gam, seWithMean = TRUE)

## food availability

food <- read.csv(here::here('data/food.csv')) #cleaned up with added tri and sed 

food.gam <- gam(mean_cpue ~ s(weighted_mean_micro, k=10) + s(weighted_mean_chl, k=10) + s(weighted_mean_fprob, k=10) + mean_tri + mean_sed, data=food, method="REML")

plot(food.gam)

References:

Hobbs, L., Banas, N. S., Cottier, F. R., Berge, J., & Daase, M. (2020). Eat or sleep: availability of winter prey explains mid-winter and spring activity in an Arctic Calanus population. Frontiers in Marine Science, 7, 541564.